<!DOCTYPE html><html lang="zh-CN" data-theme="light"><head><meta charset="UTF-8"><meta http-equiv="X-UA-Compatible" content="IE=edge"><meta name="viewport" content="width=device-width,initial-scale=1"><title>《强化学习》(第 2 版) 习题 6 | 云玩家</title><meta name="keywords" content="机器学习,强化学习,习题"><meta name="author" content="云玩家"><meta name="copyright" content="云玩家"><meta name="format-detection" content="telephone=no"><meta name="theme-color" content="#ffffff"><meta name="description" content="6.1在 $V$ 被不断更新的情况下, 式子左边的 $V(S_t)$ 与右边的是不等价的. 因为右边的 $V(S_t)$ 是没有更新的, 而左边的 $V(S_t)$ 却是已经更新的. 因此右边得再加上 $\alpha\delta_t$ . 6.2我们将整个走高速以及到家的过程简化成一个状态 $s_2$ , 从新办公楼到新停车场到高速的过程简化成 $s_1$ , 显然 $V(s_2)$ 与之前的是一">
<meta property="og:type" content="article">
<meta property="og:title" content="《强化学习》(第 2 版) 习题 6">
<meta property="og:url" content="http://yunist.cn/ML/RL/reinforcement_learning/6/index.html">
<meta property="og:site_name" content="云玩家">
<meta property="og:description" content="6.1在 $V$ 被不断更新的情况下, 式子左边的 $V(S_t)$ 与右边的是不等价的. 因为右边的 $V(S_t)$ 是没有更新的, 而左边的 $V(S_t)$ 却是已经更新的. 因此右边得再加上 $\alpha\delta_t$ . 6.2我们将整个走高速以及到家的过程简化成一个状态 $s_2$ , 从新办公楼到新停车场到高速的过程简化成 $s_1$ , 显然 $V(s_2)$ 与之前的是一">
<meta property="og:locale" content="zh_CN">
<meta property="og:image" content="http://yunist.cn/ML/RL/reinforcement_learning/6/RL.png">
<meta property="article:published_time" content="2021-08-31T07:50:09.000Z">
<meta property="article:modified_time" content="2021-09-11T04:19:07.339Z">
<meta property="article:author" content="云玩家">
<meta property="article:tag" content="机器学习">
<meta property="article:tag" content="强化学习">
<meta property="article:tag" content="习题">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="http://yunist.cn/ML/RL/reinforcement_learning/6/RL.png"><link rel="shortcut icon" href="/img/favicon.ico"><link rel="canonical" href="http://yunist.cn/ML/RL/reinforcement_learning/6/"><link rel="preconnect" href="//cdn.jsdelivr.net"/><link rel="preconnect" href="//s4.cnzz.com"/><link rel="preconnect" href="//busuanzi.ibruce.info"/><link rel="stylesheet" href="/css/index.css"><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free/css/all.min.css" media="print" onload="this.media='all'"><script async="async" data-pjax="data-pjax" src="https://s4.cnzz.com/z_stat.php?id=1278869595&amp;web_id=1278869595"></script><script>const GLOBAL_CONFIG = { 
  root: '/',
  algolia: undefined,
  localSearch: {"path":"search.xml","languages":{"hits_empty":"找不到您查询的内容：${query}"}},
  translate: undefined,
  noticeOutdate: undefined,
  highlight: {"plugin":"highlighjs","highlightCopy":true,"highlightLang":true},
  copy: {
    success: '复制成功',
    error: '复制错误',
    noSupport: '浏览器不支持'
  },
  relativeDate: {
    homepage: false,
    post: false
  },
  runtime: '',
  date_suffix: {
    just: '刚刚',
    min: '分钟前',
    hour: '小时前',
    day: '天前',
    month: '个月前'
  },
  copyright: undefined,
  lightbox: 'fancybox',
  Snackbar: undefined,
  source: {
    jQuery: 'https://cdn.jsdelivr.net/npm/jquery@latest/dist/jquery.min.js',
    justifiedGallery: {
      js: 'https://cdn.jsdelivr.net/npm/justifiedGallery/dist/js/jquery.justifiedGallery.min.js',
      css: 'https://cdn.jsdelivr.net/npm/justifiedGallery/dist/css/justifiedGallery.min.css'
    },
    fancybox: {
      js: 'https://cdn.jsdelivr.net/npm/@fancyapps/fancybox@latest/dist/jquery.fancybox.min.js',
      css: 'https://cdn.jsdelivr.net/npm/@fancyapps/fancybox@latest/dist/jquery.fancybox.min.css'
    }
  },
  isPhotoFigcaption: false,
  islazyload: false,
  isanchor: false
}</script><script src="/js/yuntools.js"></script><script id="config-diff">var GLOBAL_CONFIG_SITE = { 
  isPost: true,
  isHome: false,
  isHighlightShrink: false,
  isToc: true,
  postUpdate: '2021-09-11 12:19:07'
}</script><noscript><style type="text/css">
  #nav {
    opacity: 1
  }
  .justified-gallery img {
    opacity: 1
  }

  #recent-posts time,
  #post-meta time {
    display: inline !important
  }
</style></noscript><script>(win=>{
    win.saveToLocal = {
      set: function setWithExpiry(key, value, ttl) {
        if (ttl === 0) return
        const now = new Date()
        const expiryDay = ttl * 86400000
        const item = {
          value: value,
          expiry: now.getTime() + expiryDay,
        }
        localStorage.setItem(key, JSON.stringify(item))
      },

      get: function getWithExpiry(key) {
        const itemStr = localStorage.getItem(key)

        if (!itemStr) {
          return undefined
        }
        const item = JSON.parse(itemStr)
        const now = new Date()

        if (now.getTime() > item.expiry) {
          localStorage.removeItem(key)
          return undefined
        }
        return item.value
      }
    }
  
    win.getScript = url => new Promise((resolve, reject) => {
      const script = document.createElement('script')
      script.src = url
      script.async = true
      script.onerror = reject
      script.onload = script.onreadystatechange = function() {
        const loadState = this.readyState
        if (loadState && loadState !== 'loaded' && loadState !== 'complete') return
        script.onload = script.onreadystatechange = null
        resolve()
      }
      document.head.appendChild(script)
    })
  
      win.activateDarkMode = function () {
        document.documentElement.setAttribute('data-theme', 'dark')
        if (document.querySelector('meta[name="theme-color"]') !== null) {
          document.querySelector('meta[name="theme-color"]').setAttribute('content', '#0d0d0d')
        }
      }
      win.activateLightMode = function () {
        document.documentElement.setAttribute('data-theme', 'light')
        if (document.querySelector('meta[name="theme-color"]') !== null) {
          document.querySelector('meta[name="theme-color"]').setAttribute('content', '#ffffff')
        }
      }
      const t = saveToLocal.get('theme')
    
          if (t === 'dark') activateDarkMode()
          else if (t === 'light') activateLightMode()
        
      const asideStatus = saveToLocal.get('aside-status')
      if (asideStatus !== undefined) {
        if (asideStatus === 'hide') {
          document.documentElement.classList.add('hide-aside')
        } else {
          document.documentElement.classList.remove('hide-aside')
        }
      }
    })(window)</script><link rel="stylesheet" href="https://cdn.jsdelivr.net/gh/cnyist/blog/css/font.css"><meta name="generator" content="Hexo 5.4.0"></head><body><div id="web_bg"></div><div id="sidebar"><div id="menu-mask"></div><div id="sidebar-menus"><div class="author-avatar"><img class="avatar-img" src="https://cdn.jsdelivr.net/gh/cnyist/blog/img/avatar.svg" onerror="onerror=null;src='/img/friend_404.gif'" alt="avatar"/></div><div class="site-data"><div class="data-item is-center"><div class="data-item-link"><a href="/archives/"><div class="headline">文章</div><div class="length-num">97</div></a></div></div><div class="data-item is-center"><div class="data-item-link"><a href="/tags/"><div class="headline">标签</div><div class="length-num">40</div></a></div></div><div class="data-item is-center"><div class="data-item-link"><a href="/categories/"><div class="headline">分类</div><div class="length-num">27</div></a></div></div></div><hr/><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 主页</span></a></div><div class="menus_item"><a class="site-page" href="/archives/"><i class="fa-fw fas fa-archive"></i><span> 时间轴</span></a></div><div class="menus_item"><a class="site-page" href="/tags/"><i class="fa-fw fas fa-tags"></i><span> 标签</span></a></div><div class="menus_item"><a class="site-page" href="/categories/"><i class="fa-fw fas fa-folder-open"></i><span> 分类</span></a></div><div class="menus_item"><a class="site-page" href="/link/"><i class="fa-fw fas fa-link"></i><span> 友链</span></a></div><div class="menus_item"><a class="site-page" href="/chat/"><i class="fa-fw fas fa-comments"></i><span> 闲聊室</span></a></div><div class="menus_item"><a class="site-page" href="/about/"><i class="fa-fw fas fa-user"></i><span> about</span></a></div><div class="menus_item"><a class="site-page" href="javascript:void(0);"><i class="fa-fw iconfont icon-Web"></i><span> 镜像站点</span><i class="fas fa-chevron-down expand"></i></a><ul class="menus_item_child"><li><a class="site-page" target="_blank" rel="noopener" href="https://yunist.gitee.io"><i class="fa-fw iconfont icon-gitee-fill-round"></i><span> Gitee Pages</span></a></li></ul></div></div></div></div><div class="post" id="body-wrap"><header class="post-bg" id="page-header" style="background-image: url('/ML/RL/reinforcement_learning/6/RL.png')"><nav id="nav"><span id="blog_name"><a id="site-name" href="/">云玩家</a></span><div id="menus"><div id="search-button"><a class="site-page social-icon search"><i class="fas fa-search fa-fw"></i><span> 搜索</span></a></div><div class="menus_items"><div class="menus_item"><a class="site-page" href="/"><i class="fa-fw fas fa-home"></i><span> 主页</span></a></div><div class="menus_item"><a class="site-page" href="/archives/"><i class="fa-fw fas fa-archive"></i><span> 时间轴</span></a></div><div class="menus_item"><a class="site-page" href="/tags/"><i class="fa-fw fas fa-tags"></i><span> 标签</span></a></div><div class="menus_item"><a class="site-page" href="/categories/"><i class="fa-fw fas fa-folder-open"></i><span> 分类</span></a></div><div class="menus_item"><a class="site-page" href="/link/"><i class="fa-fw fas fa-link"></i><span> 友链</span></a></div><div class="menus_item"><a class="site-page" href="/chat/"><i class="fa-fw fas fa-comments"></i><span> 闲聊室</span></a></div><div class="menus_item"><a class="site-page" href="/about/"><i class="fa-fw fas fa-user"></i><span> about</span></a></div><div class="menus_item"><a class="site-page" href="javascript:void(0);"><i class="fa-fw iconfont icon-Web"></i><span> 镜像站点</span><i class="fas fa-chevron-down expand"></i></a><ul class="menus_item_child"><li><a class="site-page" target="_blank" rel="noopener" href="https://yunist.gitee.io"><i class="fa-fw iconfont icon-gitee-fill-round"></i><span> Gitee Pages</span></a></li></ul></div></div><div id="toggle-menu"><a class="site-page"><i class="fas fa-bars fa-fw"></i></a></div></div></nav><div id="post-info"><h1 class="post-title">《强化学习》(第 2 版) 习题 6</h1><div id="post-meta"><div class="meta-firstline"><span class="post-meta-date"><i class="far fa-calendar-alt fa-fw post-meta-icon"></i><span class="post-meta-label">发表于</span><time class="post-meta-date-created" datetime="2021-08-31T07:50:09.000Z" title="发表于 2021-08-31 15:50:09">2021-08-31</time><span class="post-meta-separator">|</span><i class="fas fa-history fa-fw post-meta-icon"></i><span class="post-meta-label">更新于</span><time class="post-meta-date-updated" datetime="2021-09-11T04:19:07.339Z" title="更新于 2021-09-11 12:19:07">2021-09-11</time></span><span class="post-meta-categories"><span class="post-meta-separator">|</span><i class="fas fa-inbox fa-fw post-meta-icon"></i><a class="post-meta-categories" href="/categories/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/">机器学习</a><i class="fas fa-angle-right post-meta-separator"></i><i class="fas fa-inbox fa-fw post-meta-icon"></i><a class="post-meta-categories" href="/categories/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0/">强化学习</a><i class="fas fa-angle-right post-meta-separator"></i><i class="fas fa-inbox fa-fw post-meta-icon"></i><a class="post-meta-categories" href="/categories/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0/%E3%80%8A%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0%E3%80%8B/">《强化学习》</a></span></div><div class="meta-secondline"><span class="post-meta-separator">|</span><span class="post-meta-pv-cv"><i class="far fa-eye fa-fw post-meta-icon"></i><span class="post-meta-label">阅读量:</span><span id="busuanzi_value_page_pv"></span></span><span class="post-meta-separator">|</span><span class="post-meta-commentcount"><i class="far fa-comments fa-fw post-meta-icon"></i><span class="post-meta-label">评论数:</span><a href="/ML/RL/reinforcement_learning/6/#post-comment"><span id="twikoo-count"></span></a></span></div></div></div></header><main class="layout" id="content-inner"><div id="post"><article class="post-content" id="article-container"><h1 id="6-1"><a href="#6-1" class="headerlink" title="6.1"></a>6.1</h1><p>在 $V$ 被不断更新的情况下, 式子左边的 $V(S_t)$ 与右边的是不等价的. 因为右边的 $V(S_t)$ 是没有更新的, 而左边的 $V(S_t)$ 却是已经更新的. 因此右边得再加上 $\alpha\delta_t$ .</p>
<h1 id="6-2"><a href="#6-2" class="headerlink" title="6.2"></a>6.2</h1><p>我们将整个走高速以及到家的过程简化成一个状态 $s_2$ , 从新办公楼到新停车场到高速的过程简化成 $s_1$ , 显然 $V(s_2)$ 与之前的是一样的, 于是我们如果采取时序差分只需一步就可以获得较准确的更新, 这样显然比蒙特卡洛整个走一遍再更新的效率要高, 且准确度是差不多的 (关键就在于 $V(s_2)$ , 即某些状态的价值仍然相当正确的) .</p>
<h1 id="6-3"><a href="#6-3" class="headerlink" title="6.3"></a>6.3</h1><p>因为无折扣, 所以 $\gamma=1$ , 又因为在多数情况下回报 $R=0$ , 所以根据 $\text{TD}$ 方法的更新公式多数情况的价值都不会改变 (仍是 $0.5$). 而第一幕, 应该是到达了左边的终止状态, 值改变了 $-0.5\alpha$ .</p>
<h1 id="6-4"><a href="#6-4" class="headerlink" title="6.4"></a>6.4</h1><p>不知如何回答.</p>
<p>但是总的来说, $\alpha$ 越小, 收敛约慢, 相同的结果也越好. 且同一参数下 (至少不是太大的参数) $\text{TD}$ 总是优于蒙特卡洛. </p>
<h1 id="6-5"><a href="#6-5" class="headerlink" title="6.5"></a>6.5</h1><p>可能是因为 $\alpha$ 过大导致难以收敛. 应该与初始化有关.</p>
<h1 id="6-6"><a href="#6-6" class="headerlink" title="6.6"></a>6.6</h1><p>蒙特卡洛和 $\text{TD}$ . 猜不到.</p>
<h1 id="6-7"><a href="#6-7" class="headerlink" title="6.7"></a>6.7</h1><p>将第 5 章中 “离轨策略 $\text{MC}$ 控制算法, 用于估计 $\pi\approx\pi_*$” 中更新公式中的 $[G-Q(S_t,A_t)]$ 改为 $[R_{t+1}+\max_aQ(S_{t+1}, a)-Q(S_t, A_t)]$ .<br>$$<br>V(S_t)=R_{t+1}+\max_aV(S_{t+1})-V(S_t)<br>$$</p>
<h1 id="6-8"><a href="#6-8" class="headerlink" title="6.8"></a>6.8</h1><p>$$<br>\begin{aligned}<br>G_t-Q(S_t,A_t)&amp;=R_{t+1}+\gamma G_{t+1}-Q(S_t,A_t)+\gamma Q(S_{t+1}, A_{t+1})-\gamma Q(S_{t+1}, A_{t+1})\<br>&amp;=\delta_t+\gamma(G_{t+1}-Q(S_{t+1}, A_{t+1}))\<br>&amp;=\cdots\<br>&amp;=\sum_{k=t}^{T-1}\gamma^{k-t}\delta_k<br>\end{aligned}<br>$$</p>
<h1 id="6-9"><a href="#6-9" class="headerlink" title="6.9"></a>6.9</h1><p>[在有风的网格世界中向 8 个方向移动](/jupyter/强化学习-第2版/5/在有风的网格世界中向 8 个方向移动.html)</p>
<h1 id="6-10"><a href="#6-10" class="headerlink" title="6.10"></a>6.10</h1><p><a href="/jupyter/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0-%E7%AC%AC2%E7%89%88/5/%E9%9A%8F%E6%9C%BA%E5%BC%BA%E5%BA%A6%E7%9A%84%E9%A3%8E.html">随机强度的风</a></p>
<h1 id="6-11"><a href="#6-11" class="headerlink" title="6.11"></a>6.11</h1><p>因为它的更新公式与 $\text{Sarsa}$ 的区别在于将 $Q(S’, A’)$ 换成了 $\max_aQ(S’,a)$ . 这其实与将你当前的策略换成贪心策略是等价的. 因此事实上你其实是用了贪心策略来更新你现在的 (不一定是贪心) 的策略, 因此是 “离轨” 的.</p>
<h1 id="6-12"><a href="#6-12" class="headerlink" title="6.12"></a>6.12</h1><p> 并不.</p>
<p>$\text{Sarsa}$ 在 $Q$ 更新前就已经选取好了下一步动作 $A’$ . 而 $\text{Q}$ 学习在更新后才选取 $A’$ . 如果执行动作后的环境 $S’$ 仍然是先前的环境 $S$ . 那么 (下一步选取的动作) 就会产生不同.</p>
<h1 id="6-13"><a href="#6-13" class="headerlink" title="6.13"></a>6.13</h1><p>应当随机的用其中一个 $Q$ , 如何用 $\epsilon$-贪心 策略选取一个动作 $A_{t+1}$ , 然后在更新公式中用另一个 $Q$ 来计算 $Q(S_{t+1}, A_{t+1})$ . </p>
</article><div class="post-copyright"><div class="post-copyright__author"><span class="post-copyright-meta">文章作者: </span><span class="post-copyright-info"><a href="mailto:undefined">云玩家</a></span></div><div class="post-copyright__type"><span class="post-copyright-meta">文章链接: </span><span class="post-copyright-info"><a href="http://yunist.cn/ML/RL/reinforcement_learning/6/">http://yunist.cn/ML/RL/reinforcement_learning/6/</a></span></div><div class="post-copyright__notice"><span class="post-copyright-meta">版权声明: </span><span class="post-copyright-info">本博客所有文章除特别声明外，均采用 <a href="https://creativecommons.org/licenses/by-nc-sa/4.0/" target="_blank">CC BY-NC-SA 4.0</a> 许可协议。转载请注明来自 <a href="http://yunist.cn" target="_blank">云玩家</a>！</span></div></div><div class="tag_share"><div class="post-meta__tag-list"><a class="post-meta__tags" href="/tags/%E6%9C%BA%E5%99%A8%E5%AD%A6%E4%B9%A0/">机器学习</a><a class="post-meta__tags" href="/tags/%E5%BC%BA%E5%8C%96%E5%AD%A6%E4%B9%A0/">强化学习</a><a class="post-meta__tags" href="/tags/%E4%B9%A0%E9%A2%98/">习题</a></div><div class="post_share"><div class="social-share" data-image="/ML/RL/reinforcement_learning/6/RL.png" data-sites="facebook,twitter,wechat,weibo,qq"></div><link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/social-share.js/dist/css/share.min.css" media="print" onload="this.media='all'"><script src="https://cdn.jsdelivr.net/npm/social-share.js/dist/js/social-share.min.js" defer></script></div></div><nav class="pagination-post" id="pagination"><div class="prev-post pull-left"><!-- - var pagination_cover = prev.cover === false ? prev.randomcover : prev.cover--><a href="/python/numpy/python_skill/"><img class="prev-cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/python/numpy/python_skill/python_skill.webp" onerror="onerror=null;src='/img/404.jpg'" alt="cover of previous post"><div class="pagination-info"><div class="label">上一篇</div><div class="prev_info">Python 小技巧</div></div></a></div><!-- - var pagination_cover = next.cover == false ? next.randomcover : next.cover--><div class="next-post pull-right"><a href="/math/mathematical_analysis/mathematical_analysis_practice/2/2-5/"><img class="next-cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/math/mathematical_analysis/mathematical_analysis_practice/2/2-5/2-5.jpeg" onerror="onerror=null;src='/img/404.jpg'" alt="cover of next post"><div class="pagination-info"><div class="label">下一篇</div><div class="next_info">(史济怀) 数学分析教程上册第 3 版-练习题 2.5</div></div></a></div></nav><div class="relatedPosts"><div class="headline"><i class="fas fa-thumbs-up fa-fw"></i><span> 相关推荐</span></div><div class="relatedPosts-list"><div><a href="/ML/RL/reinforcement_learning/1/" title="《强化学习》(第 2 版) 习题 1"><img class="cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/ML/RL/reinforcement_learning/1/RL.png" alt="cover"><div class="content is-center"><div class="date"><i class="far fa-calendar-alt fa-fw"></i> 2021-06-17</div><div class="title">《强化学习》(第 2 版) 习题 1</div></div></a></div><div><a href="/ML/RL/reinforcement_learning/2/" title="《强化学习》(第 2 版) 习题 2"><img class="cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/ML/RL/reinforcement_learning/2/RL.png" alt="cover"><div class="content is-center"><div class="date"><i class="far fa-calendar-alt fa-fw"></i> 2021-06-18</div><div class="title">《强化学习》(第 2 版) 习题 2</div></div></a></div><div><a href="/ML/RL/reinforcement_learning/3/" title="《强化学习》(第 2 版) 习题 3"><img class="cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/ML/RL/reinforcement_learning/3/RL.png" alt="cover"><div class="content is-center"><div class="date"><i class="far fa-calendar-alt fa-fw"></i> 2021-06-27</div><div class="title">《强化学习》(第 2 版) 习题 3</div></div></a></div><div><a href="/ML/RL/reinforcement_learning/4/" title="《强化学习》(第 2 版) 习题 4"><img class="cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/ML/RL/reinforcement_learning/4/RL.png" alt="cover"><div class="content is-center"><div class="date"><i class="far fa-calendar-alt fa-fw"></i> 2021-07-02</div><div class="title">《强化学习》(第 2 版) 习题 4</div></div></a></div><div><a href="/ML/RL/reinforcement_learning/5/" title="《强化学习》(第 2 版) 习题 5"><img class="cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/ML/RL/reinforcement_learning/5/RL.png" alt="cover"><div class="content is-center"><div class="date"><i class="far fa-calendar-alt fa-fw"></i> 2021-07-14</div><div class="title">《强化学习》(第 2 版) 习题 5</div></div></a></div><div><a href="/ML/RL/papers_read/RL_papers_word/" title="强化学习类论文常用表达"><img class="cover" src="https://cdn.jsdelivr.net/gh/cnyist/blog/ML/RL/papers_read/RL_papers_word/RL_papers_word.jpg" alt="cover"><div class="content is-center"><div class="date"><i class="far fa-calendar-alt fa-fw"></i> 2020-09-13</div><div class="title">强化学习类论文常用表达</div></div></a></div></div></div><hr/><div id="post-comment"><div class="comment-head"><div class="comment-headline"><i class="fas fa-comments fa-fw"></i><span> 评论</span></div></div><div class="comment-wrap"><div><div id="twikoo-wrap"></div></div></div></div></div><div class="aside-content" id="aside-content"><div class="card-widget card-info"><div class="card-info-avatar is-center"><img class="avatar-img" src="https://cdn.jsdelivr.net/gh/cnyist/blog/img/avatar.svg" onerror="this.onerror=null;this.src='/img/friend_404.gif'" alt="avatar"/><div class="author-info__name">云玩家</div><div class="author-info__description">云玩家's blog</div></div><div class="card-info-data"><div class="card-info-data-item is-center"><a href="/archives/"><div class="headline">文章</div><div class="length-num">97</div></a></div><div class="card-info-data-item is-center"><a href="/tags/"><div class="headline">标签</div><div class="length-num">40</div></a></div><div class="card-info-data-item is-center"><a href="/categories/"><div class="headline">分类</div><div class="length-num">27</div></a></div></div><a class="button--animated" id="card-info-btn" target="_blank" rel="noopener" href="https://github.com/xxxxxx"><i class="fab fa-github"></i><span>Follow Me</span></a><div class="card-info-social-icons is-center"><a class="social-icon" href="https://github.com/cnyist" target="_blank" title="Github"><i class="fab fa-github"></i></a><a class="social-icon" href="https://blog.csdn.net/chnyist" target="_blank" title="CSDN"><i class="fas iconfont icon-CN_csdnnet"></i></a><a class="social-icon" href="https://www.zhihu.com/people/cnyist" target="_blank" title="知乎"><i class="fas iconfont icon-zhihu"></i></a><a class="social-icon" href="mailto:yunist@qq.com" target="_blank" title="Email"><i class="fas fa-envelope"></i></a></div></div><div class="card-widget card-announcement"><div class="item-headline"><i class="fas fa-bullhorn card-announcement-animation"></i><span>公告</span></div><div class="announcement_content">This is my Blog</div></div><div class="sticky_layout"><div class="card-widget" id="card-toc"><div class="item-headline"><i class="fas fa-stream"></i><span>目录</span></div><div class="toc-content"><ol class="toc"><li class="toc-item toc-level-1"><a class="toc-link" href="#6-1"><span class="toc-number">1.</span> <span class="toc-text">6.1</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#6-2"><span class="toc-number">2.</span> <span class="toc-text">6.2</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#6-3"><span class="toc-number">3.</span> <span class="toc-text">6.3</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#6-4"><span class="toc-number">4.</span> <span class="toc-text">6.4</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#6-5"><span class="toc-number">5.</span> <span class="toc-text">6.5</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#6-6"><span class="toc-number">6.</span> <span class="toc-text">6.6</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#6-7"><span class="toc-number">7.</span> <span class="toc-text">6.7</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#6-8"><span class="toc-number">8.</span> <span class="toc-text">6.8</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#6-9"><span class="toc-number">9.</span> <span class="toc-text">6.9</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#6-10"><span class="toc-number">10.</span> <span class="toc-text">6.10</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#6-11"><span class="toc-number">11.</span> <span class="toc-text">6.11</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#6-12"><span class="toc-number">12.</span> <span class="toc-text">6.12</span></a></li><li class="toc-item toc-level-1"><a class="toc-link" href="#6-13"><span class="toc-number">13.</span> <span class="toc-text">6.13</span></a></li></ol></div></div><div class="card-widget card-recent-post"><div class="item-headline"><i class="fas fa-history"></i><span>最新文章</span></div><div class="aside-list"><!-- - let post_cover = article.cover--><div class="aside-list-item"><a class="thumbnail" href="/math/set_theory/unique_ordinal/" title="序数的唯一性"><img src="https://cdn.jsdelivr.net/gh/cnyist/blog/math/set_theory/unique_ordinal/unique_ordinal.png" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="序数的唯一性"/></a><div class="content"><a class="title" href="/math/set_theory/unique_ordinal/" title="序数的唯一性">序数的唯一性</a><time datetime="2022-01-12T08:26:12.000Z" title="发表于 2022-01-12 16:26:12">2022-01-12</time></div></div><!-- - let post_cover = article.cover--><div class="aside-list-item"><a class="thumbnail" href="/math/mathematical_analysis/mathematical_analysis_practice/3/3-1/" title="(史济怀) 数学分析教程上册第 3 版-练习题 3.1"><img src="https://cdn.jsdelivr.net/gh/cnyist/blog/math/mathematical_analysis/mathematical_analysis_practice/3/3-1/math.jpeg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="(史济怀) 数学分析教程上册第 3 版-练习题 3.1"/></a><div class="content"><a class="title" href="/math/mathematical_analysis/mathematical_analysis_practice/3/3-1/" title="(史济怀) 数学分析教程上册第 3 版-练习题 3.1">(史济怀) 数学分析教程上册第 3 版-练习题 3.1</a><time datetime="2021-10-19T00:30:55.000Z" title="发表于 2021-10-19 08:30:55">2021-10-19</time></div></div><!-- - let post_cover = article.cover--><div class="aside-list-item"><a class="thumbnail" href="/math/mathematical_analysis/mathematical_analysis_practice/2/2-11/" title="(史济怀) 数学分析教程上册第 3 版-练习题 2.11"><img src="https://cdn.jsdelivr.net/gh/cnyist/blog/math/mathematical_analysis/mathematical_analysis_practice/2/2-11/2-11.jpeg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="(史济怀) 数学分析教程上册第 3 版-练习题 2.11"/></a><div class="content"><a class="title" href="/math/mathematical_analysis/mathematical_analysis_practice/2/2-11/" title="(史济怀) 数学分析教程上册第 3 版-练习题 2.11">(史济怀) 数学分析教程上册第 3 版-练习题 2.11</a><time datetime="2021-09-25T03:37:28.000Z" title="发表于 2021-09-25 11:37:28">2021-09-25</time></div></div><!-- - let post_cover = article.cover--><div class="aside-list-item"><a class="thumbnail" href="/math/mathematical_analysis/mathematical_analysis_practice/2/2-10/" title="(史济怀) 数学分析教程上册第 3 版-练习题 2.10"><img src="https://cdn.jsdelivr.net/gh/cnyist/blog/math/mathematical_analysis/mathematical_analysis_practice/2/2-10/2-10.jpeg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="(史济怀) 数学分析教程上册第 3 版-练习题 2.10"/></a><div class="content"><a class="title" href="/math/mathematical_analysis/mathematical_analysis_practice/2/2-10/" title="(史济怀) 数学分析教程上册第 3 版-练习题 2.10">(史济怀) 数学分析教程上册第 3 版-练习题 2.10</a><time datetime="2021-09-22T07:36:49.000Z" title="发表于 2021-09-22 15:36:49">2021-09-22</time></div></div><!-- - let post_cover = article.cover--><div class="aside-list-item"><a class="thumbnail" href="/math/mathematical_analysis/mathematical_analysis_practice/2/2-9/" title="(史济怀) 数学分析教程上册第 3 版-练习题 2.9"><img src="https://cdn.jsdelivr.net/gh/cnyist/blog/math/mathematical_analysis/mathematical_analysis_practice/2/2-9/2-9.jpeg" onerror="this.onerror=null;this.src='/img/404.jpg'" alt="(史济怀) 数学分析教程上册第 3 版-练习题 2.9"/></a><div class="content"><a class="title" href="/math/mathematical_analysis/mathematical_analysis_practice/2/2-9/" title="(史济怀) 数学分析教程上册第 3 版-练习题 2.9">(史济怀) 数学分析教程上册第 3 版-练习题 2.9</a><time datetime="2021-09-20T08:19:06.000Z" title="发表于 2021-09-20 16:19:06">2021-09-20</time></div></div></div></div></div></div></main><footer id="footer"><div id="footer-wrap"><div class="copyright">&copy;2020 - 2022 By 云玩家</div><div class="framework-info"><span>框架 </span><a target="_blank" rel="noopener" href="https://hexo.io">Hexo</a><span class="footer-separator">|</span><span>主题 </span><a target="_blank" rel="noopener" href="https://github.com/jerryc127/hexo-theme-butterfly">Butterfly</a></div></div></footer><script>document.getElementById('web_bg').style = 'background-image: url("' + get_rand_jsd_pic('cnyist', 'banner@master', '/', 70, '-min.jpg') + '")'</script></div><div id="rightside"><div id="rightside-config-hide"><button id="readmode" type="button" title="阅读模式"><i class="fas fa-book-open"></i></button><button id="darkmode" type="button" title="浅色和深色模式转换"><i class="fas fa-adjust"></i></button><button id="hide-aside-btn" type="button" title="单栏和双栏切换"><i class="fas fa-arrows-alt-h"></i></button></div><div id="rightside-config-show"><button id="rightside_config" type="button" title="设置"><i class="fas fa-cog fa-spin"></i></button><button class="close" id="mobile-toc-button" type="button" title="目录"><i class="fas fa-list-ul"></i></button><a id="to_comment" href="#post-comment" title="直达评论"><i class="fas fa-comments"></i></a><button id="go-up" type="button" title="回到顶部"><i class="fas fa-arrow-up"></i></button></div></div><div id="local-search"><div class="search-dialog"><div class="search-dialog__title" id="local-search-title">本地搜索</div><div id="local-input-panel"><div id="local-search-input"><div class="local-search-box"><input class="local-search-box--input" placeholder="搜索文章" type="text"/></div></div></div><hr/><div id="local-search-results"></div><span class="search-close-button"><i class="fas fa-times"></i></span></div><div id="search-mask"></div></div><div><script src="/js/utils.js"></script><script src="/js/main.js"></script><script src="/js/search/local-search.js"></script><div class="js-pjax"><script>if (!window.MathJax) {
  window.MathJax = {
    loader: {
      source: {
        '[tex]/amsCd': '[tex]/amscd'
      }
    },
    tex: {
      inlineMath: [ ['$','$'], ["\\(","\\)"]],
      tags: 'ams'
    },
    options: {
      renderActions: {
        findScript: [10, doc => {
          for (const node of document.querySelectorAll('script[type^="math/tex"]')) {
            const display = !!node.type.match(/; *mode=display/)
            const math = new doc.options.MathItem(node.textContent, doc.inputJax[0], display)
            const text = document.createTextNode('')
            node.parentNode.replaceChild(text, node)
            math.start = {node: text, delim: '', n: 0}
            math.end = {node: text, delim: '', n: 0}
            doc.math.push(math)
          }
        }, ''],
        addClass: [200,() => {
          document.querySelectorAll('mjx-container:not([display=\'true\']').forEach( node => {
            const target = node.parentNode
            if (!target.classList.contains('has-jax')) {
              target.classList.add('mathjax-overflow')
            }
          })
        }, '', false]
      }
    }
  }
  
  const script = document.createElement('script')
  script.src = 'https://cdn.jsdelivr.net/npm/mathjax@3/es5/tex-mml-chtml.js'
  script.id = 'MathJax-script'
  script.async = true
  document.head.appendChild(script)
} else {
  MathJax.startup.document.state(0)
  MathJax.texReset()
  MathJax.typeset()
}</script><script>(()=>{
  const $countDom = document.getElementById('twikoo-count')
  const init = () => {
    let initData = {
      el: '#twikoo-wrap',
      envId: 'yunist-147dfe',
      region: ''
    }

    if (false) {
      const otherData = false
      initData = Object.assign(initData, otherData)
    }
    
    twikoo.init(initData)
  }

  const getCount = () => {
    twikoo.getCommentsCount({
      envId: 'yunist-147dfe',
      region: '',
      urls: [window.location.pathname],
      includeReply: false
    }).then(function (res) {
      $countDom.innerText = res[0].count
    }).catch(function (err) {
      console.error(err);
    });
  }

  const loadTwikoo = (bool = false) => {
    if (typeof twikoo === 'object') {
      init()
      bool && $countDom && setTimeout(getCount,0)
    } else {
      getScript('https://cdn.jsdelivr.net/npm/twikoo/dist/twikoo.all.min.js').then(()=> {
        init()
        bool && $countDom && setTimeout(getCount,0)
      })
    }
  }

  if ('Twikoo' === 'Twikoo' || !false) {
    if (false) btf.loadComment(document.getElementById('twikoo-wrap'), loadTwikoo)
    else loadTwikoo(true)
  } else {
    window.loadOtherComment = () => {
      loadTwikoo()
    }
  }
})()</script></div><script async data-pjax src="//busuanzi.ibruce.info/busuanzi/2.3/busuanzi.pure.mini.js"></script></div></body></html>